{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from buckaroo.dataflow.dataflow import StylingAnalysis\n",
    "from buckaroo.buckaroo_widget import BuckarooWidget\n",
    "from buckaroo.customizations.analysis import TypingStats\n",
    "N = 500\n",
    "NA = pd.NA\n",
    "ROWS = 200\n",
    "typed_df = pd.DataFrame({'int_col':np.random.randint(1,50, ROWS), 'float_col': np.random.randint(1,30, ROWS)/.7,\n",
    "                         \"str_col\": [\"foobar\"]* ROWS})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1",
   "metadata": {},
   "source": [
    "This notebook generally follows the order of [DFWhole.ts](https://github.com/paddymul/buckaroo/blob/main/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts)\n",
    "\n",
    "Starting with the simple Displayers\n",
    "```js\n",
    "export interface ObjDisplayerA {\n",
    "  displayer: 'obj';}\n",
    "\n",
    "export interface BooleanDisplayerA {\n",
    "  displayer: 'boolean';}\n",
    "\n",
    "export interface StringDisplayerA {\n",
    "  displayer: 'string';\n",
    "  max_length?: number;} \n",
    "  \n",
    "export interface FloatDisplayerA {\n",
    "  displayer: 'float';\n",
    "  min_fraction_digits: number;\n",
    "  max_fraction_digits: number;}\n",
    "\n",
    "export interface DatetimeDefaultDisplayerA {\n",
    "  displayer: 'datetimeDefault';}\n",
    "export interface IntegerDisplayerA {\n",
    "  displayer: 'integer';\n",
    "  min_digits: number;\n",
    "  max_digits: number;}\n",
    "\n",
    "export interface DatetimeLocaleDisplayerA {\n",
    "  displayer: 'datetimeLocaleString';\n",
    "  locale: 'en-US' | 'en-GB' | 'en-CA' | 'fr-FR' | 'es-ES' | 'de-DE' | 'ja-JP';\n",
    "  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DateTimeFormat/DateTimeFormat\n",
    "  args: Intl.DateTimeFormatOptions;}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#simpler Buckaroo that really shows the styling\n",
    "def obj_(pkey):\n",
    "    return {'primary_key_val': pkey, 'displayer_args': { 'displayer': 'obj' } }\n",
    "\n",
    "class GalleryStyling(StylingAnalysis):\n",
    "    requires_summary = ['dtype']\n",
    "    pinned_rows = [obj_('dtype')]\n",
    "class GalleryBuckaroo(BuckarooWidget):\n",
    "    analysis_klasses = [GalleryStyling, TypingStats]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "obj_df = pd.DataFrame({\n",
    "    'bools':[True, True, False, False, True, None],\n",
    "    'ints': [   5,   20,    30,   -10, 7772, None],\n",
    "    'timestamp':[\"2020-01-01 01:00Z\", \"2020-01-01 02:00Z\", \"2020-02-28 02:00Z\", \"2020-03-15 02:00Z\", None, None],\n",
    "    'dicts': [ {'a':10, 'b':20, 'c':'some string'}, None, None, None, None, None], #polars\n",
    "    'nested_dicts': [{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None],\n",
    "    'lists': [['a','b'], [1,2], None, None, None, None],\n",
    "    'lists-string': [['a','b'], ['foo', 'bar'], None, None, None, None],\n",
    "    'lists-int': [[10, 20], [100, 500], [8], None, None, None]}\n",
    ")\n",
    "GalleryBuckaroo(obj_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4",
   "metadata": {},
   "outputs": [],
   "source": [
    "BuckarooWidget(obj_df, \n",
    "               component_config={'className':'asdf', \n",
    "                                 'selectionBackground': 'red'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "obj_df = pd.DataFrame({\n",
    "    'bools':[True, True, False, False, True, None],\n",
    "    'ints': [   5,   20,    30,   -10, 7772, None],\n",
    "    'dicts': [ {'a':10, 'b':20, 'c':'some string'}, None, None, None, None, None], #polars\n",
    "    'nested_dicts': [{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None],\n",
    "    #'nested_dicts2': pl.Series([{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None], dtype=pl.Object)}\n",
    "    }\n",
    ")\n",
    "GalleryBuckaroo(obj_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "bool_ser = pd.Series([True, True, False, False, True, None])\n",
    "bool_df = pd.DataFrame({\n",
    "    'bools_obj_displayer':bool_ser,\n",
    "    'bools_boolean_displayer': bool_ser})\n",
    "#fix         #'bools_bool_checkbox_displayer':  {'displayer_args': {'displayer': 'boolean_checkbox'}\n",
    "BuckarooWidget(\n",
    "    bool_df, \n",
    "    column_config_overrides={\n",
    "        'bools_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      \n",
    "        'bools_boolean_displayer': {'displayer_args': {'displayer': 'boolean'}}}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "string_ser = pd.Series([\"asdf\", \"qwerty\", \"really long string, much  much longer\", None,  \"A\"])\n",
    "string_df = pd.DataFrame({\n",
    "    'strings_obj_displayer':string_ser,\n",
    "    'strings_string_displayer':string_ser,\n",
    "    'strings_string_displayer_max_len':string_ser})\n",
    "BuckarooWidget(\n",
    "    string_df, \n",
    "    column_config_overrides={\n",
    "        'strings_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      \n",
    "        'strings_string_displayer': {'displayer_args': {'displayer': 'string'}},\n",
    "        'strings_string_displayer_max_len': {'displayer_args': {'displayer': 'string', 'max_length':15}}\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8",
   "metadata": {},
   "outputs": [],
   "source": [
    "float_ser = pd.Series(\n",
    "    [5, -8, 13.23, -8.01, -999.345245234, None])\n",
    "\n",
    "def float_(min_digits, max_digits):\n",
    "    return {'displayer_args': { 'displayer': 'float', 'min_fraction_digits':min_digits, 'max_fraction_digits': max_digits}}\n",
    "float_df = pd.DataFrame({\n",
    "        'float_obj_displayer':float_ser,\n",
    "        'float_float_displayer_1__3':float_ser,\n",
    "        'float_float_displayer_0__3':float_ser,\n",
    "        'float_float_displayer_3__3':float_ser,\n",
    "        'float_float_displayer_3_13':float_ser})\n",
    "BuckarooWidget(\n",
    "    float_df,\n",
    "    column_config_overrides={\n",
    "        'float_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      \n",
    "        'float_float_displayer_1__3' : float_(1,3),\n",
    "        'float_float_displayer_0__3' : float_(0,3),\n",
    "        'float_float_displayer_3__3' : float_(3,3),\n",
    "        'float_float_displayer_3_13' : float_(3,13)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "datetime_ser = pd.to_datetime(\n",
    "    pd.Series([\"2020-01-01 01:00Z\", \"2020-01-01 02:00Z\", \"2020-02-28 02:00Z\", \"2020-03-15 02:00Z\", None]))\n",
    "datetime_df = pd.DataFrame({\n",
    "        'timestamp_obj_displayer':datetime_ser,\n",
    "        'timestamp_datetime_default_displayer':datetime_ser,\n",
    "        'timestamp_datetime_locale_en-US':datetime_ser,\n",
    "        'timestamp_datetime_locale_en-US-Long':datetime_ser,\n",
    "        'timestamp_datetime_locale_en-GB':datetime_ser,})\n",
    "def locale(locale, args={}):\n",
    "    return {'displayer_args': {'displayer': 'datetimeLocaleString',\n",
    "                                'locale':locale,\n",
    "                                'args':args}}\n",
    "BuckarooWidget(datetime_df,\n",
    "    column_config_overrides={\n",
    "        'timestamp_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},    \n",
    "        'timestamp_datetime_default_displayer' : {'displayer_args':  {  'displayer': 'datetimeDefault'}},\n",
    "        'timestamp_datetime_locale_en-US' :locale('en-US'),\n",
    "        'timestamp_datetime_locale_en-US-Long': locale('en-US', { 'weekday': 'long'}),\n",
    "        'timestamp_datetime_locale_en-GB' : locale('en-GB')})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "link_df = pd.DataFrame({'raw':      ['https://github.com/paddymul/buckaroo', 'https://github.com/pola-rs/polars'],\n",
    "                    'linkify' : ['https://github.com/paddymul/buckaroo', 'https://github.com/pola-rs/polars']})\n",
    "BuckarooWidget(link_df,\n",
    "               column_config_overrides={'linkify': {'displayer_args':  {  'displayer': 'linkify'}}})\n",
    "#fixme no underline or blue highlighting of links... but they are links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "histo_df = pd.DataFrame({\n",
    "    'names': ['index', 'all_NA', 'half_NA', 'longtail', 'longtail_unique'],\n",
    "     'histogram_props': ['histogram',\n",
    "          [{'name': 'NA', 'NA': 100.0}],\n",
    "          [{'name': 1, 'cat_pop': 44.0}, {'name': 'NA', 'NA': 56.0}],\n",
    "          [{'name': 'long_97', 'cat_pop': 0.0},\n",
    "           {'name': 'long_139', 'cat_pop': 0.0},\n",
    "           {'name': 'long_12', 'cat_pop': 0.0},\n",
    "           {'name': 'long_134', 'cat_pop': 0.0},\n",
    "           {'name': 'long_21', 'cat_pop': 0.0},\n",
    "           {'name': 'long_44', 'cat_pop': 0.0},\n",
    "           {'name': 'long_58', 'cat_pop': 0.0},\n",
    "           {'name': 'longtail', 'longtail': 77.0},\n",
    "           {'name': 'NA', 'NA': 20.0}],\n",
    "          [{'name': 'long_113', 'cat_pop': 0.0},\n",
    "           {'name': 'long_116', 'cat_pop': 0.0},\n",
    "           {'name': 'long_33', 'cat_pop': 0.0},\n",
    "           {'name': 'long_72', 'cat_pop': 0.0},\n",
    "           {'name': 'long_122', 'cat_pop': 0.0},\n",
    "           {'name': 'long_6', 'cat_pop': 0.0},\n",
    "           {'name': 'long_83', 'cat_pop': 0.0},\n",
    "           {'name': 'longtail', 'unique': 50.0, 'longtail': 47.0}]]})\n",
    "from buckaroo.customizations.analysis import TypingStats\n",
    "from buckaroo.buckaroo_widget import BuckarooWidget\n",
    "class PdGalleryBuckaroo(BuckarooWidget):\n",
    "    analysis_klasses = [GalleryStyling, TypingStats]\n",
    "PdGalleryBuckaroo(histo_df,\n",
    "                   column_config_overrides={\n",
    "                    'histogram_props': {'displayer_args': {'displayer': 'histogram'}}})\n",
    "#Fixme, this doesn't work with polars right now, probably related to the object dtype problem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "12",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "png_smiley = 'iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=';"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "img_df = pd.DataFrame({'raw':            [png_smiley, None],\n",
    "                    'img_displayer' : [png_smiley, None]})\n",
    "BuckarooWidget(img_df,\n",
    "               column_config_overrides={\n",
    "                   'raw':           {'displayer_args': {'displayer': 'string', 'max_length':40}},\n",
    "                   'img_displayer': {'displayer_args': {'displayer': 'Base64PNGImageDisplayer'}, 'ag_grid_specs' : {'width':150}}})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "14",
   "metadata": {},
   "source": [
    "# Tooltips"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "bw = BuckarooWidget(\n",
    "    typed_df, \n",
    "    column_config_overrides={\n",
    "        'str_col':\n",
    "            {'tooltip_config': { 'tooltip_type':'simple', 'val_column': 'int_col'}}})\n",
    "bw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16",
   "metadata": {},
   "outputs": [],
   "source": [
    "bw.df_display_args"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17",
   "metadata": {},
   "source": [
    "# Color_map_config\n",
    "```ts\n",
    "export type ColorMap = \"BLUE_TO_YELLOW\" | \"DIVERGING_RED_WHITE_BLUE\" | string[];\n",
    "\n",
    "//ColorMapRules\n",
    "export interface ColorMapRules {\n",
    "    color_rule: \"color_map\";\n",
    "    map_name: ColorMap;\n",
    "    //optional, the column to base the ranges on.  the proper histogram_bins must still be sent in for that column\n",
    "    val_column?: string;\n",
    "}\n",
    "\n",
    "//ColorCategorical rules\n",
    "export interface ColorCategoricalRules {\n",
    "    color_rule: \"color_categorical\";\n",
    "    map_name: ColorMap;\n",
    "    //optional, the column to base the ranges on.  the proper histogram_bins must still be sent in for that column\n",
    "    val_column?: string;\n",
    "}\n",
    "\n",
    "//if exist_column is not null,  set cell style to condtional_color... used for highlighting changed values or errored_rows\n",
    "export interface ColorWhenNotNullRules {\n",
    "    color_rule: \"color_not_null\";\n",
    "    conditional_color: string | \"red\";\n",
    "    exist_column: string;\n",
    "}\n",
    "\n",
    "export interface ColorFromColumn {\n",
    "    color_rule: \"color_from_column\";\n",
    "    col_name: string;\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76ffd061-2159-4982-ac76-1a76306e5362",
   "metadata": {},
   "outputs": [],
   "source": [
    "BuckarooWidget(\n",
    "    typed_df, \n",
    "    column_config_overrides={\n",
    "        'float_col': {'color_map_config': {\n",
    "          'color_rule': 'color_map',\n",
    "          'map_name': 'BLUE_TO_YELLOW',\n",
    "          'val_column': 'int_col'\n",
    "        }}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02b23ff0-88c3-4bc2-bf95-e54ad4a87d33",
   "metadata": {},
   "outputs": [],
   "source": [
    "# you can also pass in your own color_map directly as an array\n",
    "BuckarooWidget(\n",
    "    pd.DataFrame({'a':[9, 10, 3,4,5,1, 1]}),\n",
    "    column_config_overrides={\n",
    "        'a': {'color_map_config': {\n",
    "          'color_rule': 'color_map',\n",
    "          'map_name': [\"pink\", \"#73ae80\", \"#90b2b3\", \"#6c83b5\", \"orange\"]\n",
    "        }}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47db894f-5743-4ef6-8845-e20122a4ce26",
   "metadata": {},
   "outputs": [],
   "source": [
    "# color categorical allows direct indexing into a color map.  \n",
    "# values that exceed the array length are displayed with the default backgroudn\n",
    "BuckarooWidget(\n",
    "    pd.DataFrame({'a':[9, 10, 1,2,3,4,5]}),\n",
    "    column_config_overrides={\n",
    "        'a': {'color_map_config': {\n",
    "          'color_rule': 'color_categorical',\n",
    "          'map_name': [\"pink\", \"#73ae80\", \"#90b2b3\", \"#6c83b5\", \"orange\"]\n",
    "        }}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "BuckarooWidget(\n",
    "    typed_df, \n",
    "    column_config_overrides={\n",
    "        'float_col': {'color_map_config': {\n",
    "          'color_rule': 'color_map',\n",
    "          'map_name': 'BLUE_TO_YELLOW',\n",
    "        }}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "19",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "error_df = pd.DataFrame({\n",
    "    'a': [10, 20, 30],\n",
    "    'err_messages': [None, \"a must be less than 19, it is 20\", \"a must be less than 19, it is 30\"]})\n",
    "\n",
    "BuckarooWidget(\n",
    "    error_df, \n",
    "    column_config_overrides={\n",
    "        'a': {'color_map_config': {\n",
    "            'color_rule': 'color_not_null',\n",
    "            'conditional_color': 'red',\n",
    "            'exist_column': 'err_messages'}}})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03118aec-b77a-4bc7-9658-1a1620a016a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "color_df = pd.DataFrame({\n",
    "    'a': [10, 20, 30],\n",
    "    'a_colors': ['red', '#d3a', 'green']})\n",
    "\n",
    "BuckarooWidget(\n",
    "    color_df, \n",
    "    column_config_overrides={\n",
    "        'a': { 'color_map_config': {\n",
    "          'color_rule': 'color_from_column',\n",
    "          'val_column': 'a_colors'}}})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "21",
   "metadata": {},
   "source": [
    "Extra col def type\n",
    "\n",
    "`ag_grid_specs`\n",
    "only to be used for very specific hacking.  You can set any primitive property, but can't set function props\n",
    "https://github.com/ag-grid/ag-grid/blob/latest/packages/ag-grid-community/src/entities/colDef.ts"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "22",
   "metadata": {},
   "source": [
    "# Utility Code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#utility code for generating histogram data structures\n",
    "from buckaroo.buckaroo_widget import BuckarooWidget\n",
    "\n",
    "NA = pd.NA\n",
    "def rand_cat(named_p, na_per, N):\n",
    "    choices, p = [], []\n",
    "    named_total_per = sum(named_p.values()) + na_per\n",
    "    total_len = int(np.floor(named_total_per * N))\n",
    "    if named_total_per > 0:\n",
    "        for k, v in named_p.items():\n",
    "            choices.append(k)\n",
    "            p.append(v/named_total_per)\n",
    "\n",
    "        choices.append(NA)\n",
    "        p.append(na_per/named_total_per)    \n",
    "        return [np.random.choice(choices, p=p) for k in range(total_len)]\n",
    "    else:\n",
    "        return []\n",
    "\n",
    "def random_categorical(named_p, unique_per, na_per, longtail_per, N):\n",
    "    choice_arr = rand_cat(named_p, na_per, N)\n",
    "    discrete_choice_len = len(choice_arr)\n",
    "\n",
    "    longtail_count = int(np.floor(longtail_per * N))//2\n",
    "    extra_arr = []\n",
    "    for i in range(longtail_count):\n",
    "        extra_arr.append(\"long_%d\" % i)\n",
    "        extra_arr.append(\"long_%d\" % i)\n",
    "\n",
    "    unique_len = N - (len(extra_arr) + discrete_choice_len)\n",
    "    #print(\"discrete_choice_len\", discrete_choice_len, \"longtail_count\", longtail_count, \"unique_len\", unique_len)\n",
    "    for i in range(unique_len):\n",
    "        extra_arr.append(\"unique_%d\" % i)\n",
    "    all_arr = np.concatenate([choice_arr, extra_arr])\n",
    "    np.random.shuffle(all_arr)\n",
    "    return all_arr        \n",
    "cat_histo_df = pd.DataFrame({\n",
    "    'all_NA' :          [NA] * N,\n",
    "    'half_NA' :         random_categorical({1: .5}, unique_per=0,   na_per=.5, longtail_per=.0, N=N),\n",
    "    'longtail' :        random_categorical({},      unique_per=0,   na_per=.2, longtail_per=.8, N=N),\n",
    "    'longtail_unique' : random_categorical({},      unique_per=0.5, na_per=.0, longtail_per=.5, N=N),\n",
    "})\n",
    "bw = BuckarooWidget(\n",
    "    cat_histo_df, \n",
    "    pinned_rows=[\n",
    "        { 'primary_key_val': 'dtype',     'displayer_args': { 'displayer': 'obj' } },\n",
    "        { 'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }},   \n",
    "    ])\n",
    "histogram_vals = [x for x in bw.df_data_dict['all_stats'] if x['index'] == 'histogram'][0]\n",
    "def format_histo(bw):\n",
    "    histogram_vals = [x for x in bw.df_data_dict['all_stats'] if x['index'] == 'histogram'][0]\n",
    "    return dict(\n",
    "        names = list(histogram_vals.keys()),\n",
    "        histogram_props = list(histogram_vals.values()))\n",
    "format_histo(bw)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
